from bs4 import BeautifulSoup
import requests
from requests_testadapter import Resp
import os

filename = "reuters.csv"
f = open(filename, "w")
headers = "Date, Title, Description\n"
f.write(headers)

url = 'file://C:/Users/Daniel/Desktop/Python/Factiva/1.html' #urls to scrape

class LocalFileAdapter(requests.adapters.HTTPAdapter):
    def build_response_from_file(self, request):
        file_path = request.url[7:]
        with open(file_path, 'rb') as file:
            buff = bytearray(os.path.getsize(file_path))
            file.readinto(buff)
            resp = Resp(buff)
            r = self.build_response(request, resp)

            return r

    def send(self, request, stream=False, timeout=None,
             verify=True, cert=None, proxies=None):

        return self.build_response_from_file(request)

requests_session = requests.session()
requests_session.mount('file://', LocalFileAdapter())

def scraper(url):
	# BeautifulSoup it
	resp = requests_session.get(url) #gives response object
	soup = BeautifulSoup(resp.text, 'lxml') #search for data in a structured manner; lxml is parser library to structure data 
	article_section = soup.find_all("table", {"class":"headlines"})[0]
	all_articles = article_section.find_all('tr')

	#Comb through all articles
	for article in all_articles:
		try:
			headline = article.b.text
			
			summary_jialat = article.find_all("div", {"class":"snippet ensnippet"})[0].text.replace(",", ";")
			summary = ' '.join(summary_jialat.split())
			if "(Reuters) -" in summary:
				dash_place = int(summary.find('-'))
				summary = summary[dash_place+2:]

			date_w_extra = article.find_all("div", {"class":"leadFields"})[0].text[21:]
			comma_place = int(date_w_extra.find(','))
			date = date_w_extra[:comma_place]

		except AttributeError:
			pass
	#Write into excel file
		try:
			f.write(date + "," + headline.replace(",", ";") + "," + summary + "\n")
		except UnboundLocalError:
			pass
		print(str(n) + ' done')


for n in range(1,15):
	scraper('file://C:/Users/Daniel/Desktop/Python/Factiva/'+str(n)+'.html')

#use requests to open first page
#read and copy csv
#open next page

